********************************************************************************
* akm_estimation.do
* Purpose: Estimate firm employer fixed effects (AKM effects) using the
*          Abowd-Kramarz-Margolis (1999) two-way fixed effects model.
*
* The AKM model decomposes log wages as:
*   log w_it = alpha_i + psi_j(i,t) + X_it*beta + epsilon_it
* where alpha_i is a worker fixed effect and psi_j is a firm fixed effect.
*
* Estimation strategy:
*   1. Restrict to "movers" (workers who changed employer or had employment gaps)
*   2. Keep only firms with >= 5 average employees
*   3. Drop earnings below ~CAD 3,900 (real 2011) to exclude marginal jobs
*   4. Exclude firm-years at the time of the M&A deal itself
*   5. Identify the largest connected set of workers and firms
*   6. Partial out year effects, then use reghdfe to absorb worker and firm FEs
*
* Input : $data/worker_firm_panel.dta   (from worker_firm_panel.do)
*         $data/canada_cpi.dta          (CPI deflator)
*         $data/first_mna.dta           (deal year, to drop event years)
*         $data/firm_level_emp.dta      (employment, for size restriction)
*
* Output: $data/akm.dta
*         Variables: entid_syn, fe (employer fixed effect)
*         One row per firm (the estimated psi_j value).
*
* Requires: reghdfe, group2hdfe (user-written packages)
********************************************************************************
use $data/worker_firm_panel, clear

//------------------------------------------------------------------------------
// STEP 1: IDENTIFY MOVERS
//
// "Movers" are workers who satisfy at least one of:
//   (a) year == last_year_at_firm  (job ended)
//   (b) year_forw > 1 (a gap follows in this spell)
//   (c) unemployed == 1 (a multi-year gap precedes this spell)
// Workers with ever_moved == 0 are "never-movers" and are dropped, since
// the AKM firm effect is only identified from workers who switch firms.
//------------------------------------------------------------------------------
gsort casenum2019 year entid_syn
by casenum2019: gen gap		= year[_n+1] - year  // years to next observation

* unemployed == 1 if the worker had a gap of more than 1 year between observations
gen unemployed = ( gap > 1 & ~mi(gap) )

gsort casenum2019 entid_syn year
gegen last_year_of_data	 = max(year), by(casenum2019)    		// last year worker appears in data
by casenum2019 entid_syn: gen year_forw = year[_n+1] - year  	// years to next obs within spell

* Construct the "moved" indicator for each observation
gen		moved	= 1 if year == last_year_at_firm           // last year at this firm
replace moved	= 1 if year_forw > 1 & ~mi(year_forw)      // gap within spell
replace moved	= 1 if unemployed == 1                      // preceded by employment gap
replace moved	= 0 if year == last_year_of_data            // last year in data -> not a mover
replace moved	= 0 if mi(moved)

* ever_moved == 1 if the worker moved in any year (ever changed firm or had a gap)
gegen ever_moved	=	max(moved), 	by(casenum2019)


//------------------------------------------------------------------------------
// STEP 2: EARNINGS AND SAMPLE RESTRICTIONS
//------------------------------------------------------------------------------

* CPI deflator (base year 2011, CPI_2011 = 119.9)
merge m:1 year 	using $data/canada_cpi, keep(1 3) nogen
gen 	CPI_base_2011 	= CPI/119.9
gen		t4earn_adjusted = t4earn/CPI_base_2011          // real 2011 CAD earnings
gen 	logearnings		= log(t4earn_adjusted)

* Merge deal year (to exclude event years from AKM estimation)
merge m:1 entid_syn 		using $data/first_mna, 		keep(1 3) keepusing(DEAL_YEAR) nogen

* Merge firm average employment (for size restriction)
merge m:1 entid_syn year 	using $data/firm_level_emp, keep(1 3) keepusing(PD7_AvgEmp_NonZero) nogen

* Apply sample restrictions:
drop if ever_moved	==	0                    // drop never-movers (no firm FE identification)
drop if PD7_AvgEmp_NonZero < 5               // drop very small firms (< 5 average employees)
drop if mi(PD7_AvgEmp_NonZero)               // drop firms with missing employment
drop if t4earn_adjusted < 3900               // drop earnings below ~CAD 3,900 (real)
drop if mi(t4earn_adjusted)                  // drop missing earnings
drop if year == DEAL_YEAR                    // exclude the deal year itself


//------------------------------------------------------------------------------
// STEP 3: AKM FIXED EFFECTS ESTIMATION
//
// (a) Drop workers observed only once (a panel of length 1 cannot identify FEs)
// (b) Use group2hdfe to identify the largest connected set of worker-firm pairs
//     (Abowd et al. 1999: FEs are only identified within connected components)
// (c) Partial out year fixed effects (two-step Mundlak approach)
// (d) Use reghdfe to simultaneously absorb worker (pe) and firm (fe) FEs
//------------------------------------------------------------------------------

* Drop singletons (workers with only one observation in the estimation sample)
bys casenum2019: gen panelworker = _N
drop if panelworker == 1
drop panelworker

* Identify the largest connected set (workers and firms linked through job moves)
group2hdfe casenum2019 entid_syn , group(mygroup) largest(largest_connected_set) verbose
keep if largest_connected_set==1
drop mygroup largest_connected_set

* Keep only the variables needed for estimation
keep casenum2019 logearnings year entid_syn year
order casenum2019 logearnings year entid_syn year

* Step 1: Remove year effects from log earnings
reg logearnings i.year
predict logearnings_tilde, resid           // residual after removing year dummies

* Step 2: Regress year-demeaned log earnings on worker and firm FEs simultaneously.
*   fe = estimated firm fixed effect (psi_j in AKM notation)
*   pe = estimated worker fixed effect (alpha_i, not saved)
*   worker_firm_res = residual (used in variance decomposition if needed)
reghdfe logearnings_tilde, absorb(fe = entid_syn pe = casenum2019) tol(0.001) verbose (1) resid(worker_firm_res)


//------------------------------------------------------------------------------
// STEP 4: SAVE FIRM FIXED EFFECTS
//
// Keep one observation per firm (the firm FE is the same across all workers).
// seq trick: sort within firm and drop all but the first observation.
//------------------------------------------------------------------------------
keep if ~mi(fe)             // drop observations where the FE was not estimated
keep fe entid_syn
bys entid_syn: gen seq=_n-1
keep if seq==1              // keep the first (and only unique) FE value per firm
drop seq

compress
save $data/akm.dta, replace
